******SET YOUR DIRECTORY to ...\replication


clear all
set maxvar 6000


***R&D industries
*Match NAICS 2002-1997 and SIC 1987
use rawdata\CBPCounty\MatchNAICS\naics_1997_2002.dta, clear

gen naics02_4dig=substr(string(naics02), 1,4)
destring naics02_4dig, replace force
gen naics97_4dig=substr(string(naics97), 1,4)
destring naics97_4dig, replace force

tempfile match_sicnaics
save `match_sicnaics'

use rawdata\CBPCounty\MatchNAICS\naics_1987_1997.dta, clear

rename naics naics97
keep sic naics97

destring sic, force replace
drop if sic<100
drop if sic==.
drop if naics97==.

gen naics97_4dig=substr(string(naics97), 1,4)
destring naics97_4dig, replace force

joinby naics97_4dig using `match_sicnaics'

keep sic naics97 naics02 naics97_4dig naics02_4dig

tempfile match_sicnaics
save `match_sicnaics'

*R&D industries
import excel using rawdata\CBPCounty\MatchNAICS\rd_industry.xlsx, first clear
keep naics02
destring naics02, force replace
drop if naics02==.
rename naics02 naics02_4dig

gen rd_industry=1
*46 NAICS 4-digit industries are R&D

merge 1:m naics02_4dig using `match_sicnaics'
drop _merge

replace rd_industry=0 if rd_industry==.
gen weight_rd_industry=rd_industry

collapse (max) rd_industry (max) weight_rd_industry, by(sic naics02_4dig)

collapse (max) rd_industry (mean) weight_rd_industry, by(sic)

*186 SIC industries are R&D (average weight of R&D is 15%)

save "rawdata\CBPCounty\data_sic_rd.dta", replace


***CBP data

///1974
///New England
cd "rawdata\CBPCounty\NationalArchives\1974"
infile using RG029_CBP74_T2I1.dct, clear
gen county=string(fipstate)+string(fipscty2) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty2) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty2) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty2) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty2) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty2) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=74
replace year=1900+year
rename tempmm emp
rename siccode2 sic
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
rename tanpay ap
rename tflag empflag
keep county year emp sic ap empflag fipstate
sort county
tempfile cbp_74
save `cbp_74'
///Other regions
forvalues i=2/9{
infile using RG029_CBP74_T2I`i'.dct, clear
gen county=string(fipstate)+string(fipscty2) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty2) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty2) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty2) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty2) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty2) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=74
replace year=1900+year
rename tempmm emp
rename siccode2 sic
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
rename tanpay ap
rename tflag empflag
keep county year emp sic ap empflag fipstate
sort county
append using `cbp_74'
save `cbp_74', replace
}
sort county year
save "rawdata\CBPCounty\data_cbp_cny.dta", replace
///Other years (1975-1985)
forvalues j=75/85{
///New England
infile using rawdata\CBPCounty\NationalArchives\19`j'\RG029_CBP`j'_T2I1.dct, clear
gen county=string(fipstate)+string(fipscty2) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty2) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty2) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty2) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty2) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty2) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=`j'
replace year=1900+year
rename tempmm emp
rename siccode2 sic
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
rename tanpay ap
rename tflag empflag
keep county year emp sic ap empflag fipstate
sort county
tempfile cbp_`j'
save `cbp_`j''

///Other regions
forvalues i=2/9{
infile using rawdata\CBPCounty\NationalArchives\19`j'\RG029_CBP`j'_T2I`i'.dct, clear
gen county=string(fipstate)+string(fipscty2) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty2) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty2) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty2) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty2) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty2) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=`j'
replace year=1900+year
rename tempmm emp
rename siccode2 sic
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
rename tanpay ap
rename tflag empflag
keep county year emp sic ap empflag fipstate
sort county
append using `cbp_`j''
save `cbp_`j'', replace
}
sort county year
append using "rawdata\CBPCounty\data_cbp_cny.dta"
sort county year
save "rawdata\CBPCounty\data_cbp_cny.dta", replace
}

///1986
insheet using "rawdata\CBPCounty\cbp86co.txt", clear
gen county=string(fipstate)+string(fipscty) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=86
replace year=1900+year
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
keep county year emp sic ap empflag fipstate
sort county
append using "rawdata\CBPCounty\data_cbp_cny.dta"
sort county year
save "rawdata\CBPCounty\data_cbp_cny.dta", replace
///Other years (1987-1991)
forvalues j=87/91{
insheet using "rawdata\CBPCounty\cbp`j'co.txt", clear
gen county=string(fipstate)+string(fipscty) if fipstate>=10 & fipscty>=100
replace county="0"+string(fipstate)+string(fipscty) if fipstate<10 & fipscty>=100
replace county="0"+string(fipstate)+"0"+string(fipscty) if fipstate<10 & fipscty<100 & fipscty>=10
replace county="0"+string(fipstate)+"00"+string(fipscty) if fipstate<10 & fipscty<10
replace county=string(fipstate)+"0"+string(fipscty) if fipstate>=10 & fipscty<100 & fipscty>=10
replace county=string(fipstate)+"00"+string(fipscty) if fipstate>=10 & fipscty<10
destring county, replace force
gen year=`j'
replace year=1900+year
replace sic=subinstr(sic, "----", "0",.)
replace sic=subinstr(sic, "--", "",.)
destring sic, replace force
keep county year emp sic ap empflag fipstate
sort county
append using "rawdata\CBPCounty\data_cbp_cny.dta"
sort county year
save "rawdata\CBPCounty\data_cbp_cny.dta", replace
}

use "rawdata\CBPCounty\data_cbp_cny.dta", clear

replace sic=20 if sic==19 & year<1987
rename county fipscounty
sort fipscounty year

save "rawdata\CBPCounty\data_cbp_cny.dta", replace



***From bridge obtain place-county-msa link
use "intermediatedata\bridge_final.dta", clear

drop if FMSA==9999

gen place_cc=FPLACE if city_center==1

*Generate FIPS county code
gen fipscounty="0"+string(FSTATE)+"00"+string(FCOUNTY) if FSTATE<10 & FCOUNTY<10
replace fipscounty=string(FSTATE)+"00"+string(FCOUNTY) if FSTATE>=10 & FSTATE!=. & FCOUNTY<10
replace fipscounty="0"+string(FSTATE)+"0"+string(FCOUNTY) if FSTATE<10 & FCOUNTY>=10 & FCOUNTY<100
replace fipscounty=string(FSTATE)+"0"+string(FCOUNTY) if FSTATE>=10 & FSTATE!=. & FCOUNTY>=10 & FCOUNTY<100
replace fipscounty="0"+string(FSTATE)+string(FCOUNTY) if FSTATE<10 & FCOUNTY>=10 & FCOUNTY>=100 & FCOUNTY!=.
replace fipscounty=string(FSTATE)+string(FCOUNTY) if FSTATE>=10 & FSTATE!=. & FCOUNTY>=10 & FCOUNTY>=100 & FCOUNTY!=.
destring fipscounty, replace force
drop if fipscounty==.

*Generate FIPS place code
gen fipsplace_00="0"+string(FSTATE)+"000"+string(place_cc) if FSTATE<10 & place_cc<100
replace fipsplace_00=string(FSTATE)+"000"+string(place_cc) if FSTATE>=10 & FSTATE!=. & place_cc<100
replace fipsplace_00="0"+string(FSTATE)+"00"+string(place_cc) if FSTATE<10 & place_cc<1000 & place_cc>=100
replace fipsplace_00=string(FSTATE)+"00"+string(place_cc) if FSTATE>=10 & FSTATE!=. & place_cc<1000 & place_cc>=100
replace fipsplace_00="0"+string(FSTATE)+"0"+string(place_cc) if FSTATE<10 & place_cc<10000 & place_cc>=1000
replace fipsplace_00=string(FSTATE)+"0"+string(place_cc) if FSTATE>=10 & FSTATE!=. & place_cc<10000 & place_cc>=1000
replace fipsplace_00="0"+string(FSTATE)+string(place_cc) if FSTATE<10 & place_cc!=. & place_cc>=10000
replace fipsplace_00=string(FSTATE)+string(place_cc) if FSTATE>=10 & FSTATE!=. & place_cc!=. & place_cc>=10000
destring fipsplace_00, replace force

collapse (mean) fipsplace_00, by(fipscounty FMSA)

*Drop counties not in MSA
drop if FMSA==.

*Drop if MSA does not have city center
egen check=mean(fipsplace_00), by(FMSA)
drop if check==.
drop check

***Merge with  CBP county employment
joinby fipscounty using "rawdata\CBPCounty\data_cbp_cny.dta"

rename fipscounty fipscounty_00


*Generate sector
gen superind=1 if sic==7
replace superind=2 if sic==10 | sic==15 | sic==20
replace superind=3 if (sic>40 & sic<99)
replace superind=4 if sic==99
label define superindl 1 "Agriculture"  2 "Goods producing industries" 3 "Service producing industries" 4 "Other industries"
label values superind superindl

*Generate two digit
gen sic_digit_2=substr(string(sic), -2,.) if sic>=1000
gen sic_digit=2 if sic==0 | sic==7 | sic==10 | sic==15 | sic==20 | ///
        sic==40 | sic==50 | sic==52 | sic==60 | sic==70 | ///
		sic==99
replace sic_digit=3 if sic_digit_2=="00"
replace sic_digit=4 if sic_digit_2!="00" & sic_digit_2!=""
drop sic_digit_2

*Merge with R&D industries
merge m:1 sic using "$path\CBPCounty\data_sic_rd.dta"
drop if _merge==2
drop _merge
*28 SIC industries with R&D are unmatched

*Compute proportion of employment in central county according to R&D industries
replace rd_industry=0 if rd_industry==.
gen emp_wt_rd=emp*weight_rd_industry
gen manuf=(sic>=2000 & sic<4000)
replace manuf=. if sic_digit==2 | sic_digit==3
*R&D, unweighted
egen emp_msa_rd=total(emp) if rd_industry==1, by(FMSA year)
egen emp_cc_rd=total(emp) if rd_industry==1 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_2=emp_cc_rd/emp_msa_rd if fipsplace_00!=.
replace perc_empl_ctycc_rd_2=1 if emp_cc_rd==0 & emp_msa_rd==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd=max(perc_empl_ctycc_rd_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_2
*R&D, weighted
egen emp_msa_rd_wt=total(emp_wt_rd) if rd_industry==1, by(FMSA year)
egen emp_cc_rd_wt=total(emp_wt_rd) if rd_industry==1 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_wt_2=emp_cc_rd_wt/emp_msa_rd_wt if fipsplace_00!=.
replace perc_empl_ctycc_rd_wt_2=1 if emp_cc_rd_wt==0 & emp_msa_rd_wt==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd_wt=max(perc_empl_ctycc_rd_wt_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_wt_2
*R&D, manufacturing, unweighted
egen emp_msa_rd_sic20=total(emp) if rd_industry==1 & manuf==1, by(FMSA year)
egen emp_cc_rd_sic20=total(emp) if rd_industry==1 & manuf==1 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_sic20_2=emp_cc_rd_sic20/emp_msa_rd_sic20 if fipsplace_00!=.
replace perc_empl_ctycc_rd_sic20_2=1 if emp_cc_rd_sic20==0 & emp_msa_rd_sic20==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd_sic20=max(perc_empl_ctycc_rd_sic20_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_sic20_2
*R&D, manufacturing, weighted
egen emp_msa_rd_sic20_wt=total(emp_wt_rd) if rd_industry==1 & manuf==1, by(FMSA year)
egen emp_cc_rd_sic20_wt=total(emp_wt_rd) if rd_industry==1 & manuf==1 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_sic20_wt_2=emp_cc_rd_sic20_wt/emp_msa_rd_sic20_wt if fipsplace_00!=.
replace perc_empl_ctycc_rd_sic20_wt_2=1 if emp_cc_rd_sic20_wt==0 & emp_msa_rd_sic20_wt==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd_sic20_wt=max(perc_empl_ctycc_rd_sic20_wt_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_sic20_wt_2
*R&D, no manufacturing, unweighted
egen emp_msa_rd_nosic20=total(emp) if rd_industry==1 & manuf==0, by(FMSA year)
egen emp_cc_rd_nosic20=total(emp) if rd_industry==1 & manuf==0 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_nosic20_2=emp_cc_rd_nosic20/emp_msa_rd_nosic20 if fipsplace_00!=.
replace perc_empl_ctycc_rd_nosic20_2=1 if emp_cc_rd_nosic20==0 & emp_msa_rd_nosic20==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd_nosic20=max(perc_empl_ctycc_rd_nosic20_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_nosic20_2
*R&D, no manufacturing, weighted
egen emp_msa_rd_nosic20_wt=total(emp_wt_rd) if rd_industry==1 & manuf==0, by(FMSA year)
egen emp_cc_rd_nosic20_wt=total(emp_wt_rd) if rd_industry==1 & manuf==0 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_rd_nosic20_wt_2=emp_cc_rd_nosic20_wt/emp_msa_rd_nosic20_wt if fipsplace_00!=.
replace perc_empl_ctycc_rd_nosic20_wt_2=1 if emp_cc_rd_nosic20_wt==0 & emp_msa_rd_nosic20_wt==0 & fipsplace_00!=.
egen perc_empl_ctycc_rd_nosic20_wt=max(perc_empl_ctycc_rd_nosic20_wt_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_rd_nosic20_wt_2
*No R&D, unweighted
egen emp_msa_nord=total(emp) if rd_industry==0, by(FMSA year)
egen emp_cc_nord=total(emp) if rd_industry==0 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_nord_2=emp_cc_nord/emp_msa_nord if fipsplace_00!=.
replace perc_empl_ctycc_nord_2=1 if emp_cc_nord==0 & emp_msa_nord==0 & fipsplace_00!=.
egen perc_empl_ctycc_nord=max(perc_empl_ctycc_nord_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_nord_2
*No R&D, manufacturing, unweighted
egen emp_msa_nord_sic20=total(emp) if rd_industry==0 & manuf==1, by(FMSA year)
egen emp_cc_nord_sic20=total(emp) if rd_industry==0 & manuf==1 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_nord_sic20_2=emp_cc_nord_sic20/emp_msa_nord_sic20 if fipsplace_00!=.
replace perc_empl_ctycc_nord_sic20_2=1 if emp_cc_nord_sic20==0 & emp_msa_nord_sic20==0 & fipsplace_00!=.
egen perc_empl_ctycc_nord_sic20=max(perc_empl_ctycc_nord_sic20_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_nord_sic20_2
*No R&D, no manufacturing, unweighted
egen emp_msa_nord_nosic20=total(emp) if rd_industry==0 & manuf==0, by(FMSA year)
egen emp_cc_nord_nosic20=total(emp) if rd_industry==0 & manuf==0 & fipsplace_00!=., by(FMSA year)
gen perc_empl_ctycc_nord_nosic20_2=emp_cc_nord_nosic20/emp_msa_nord_nosic20 if fipsplace_00!=.
replace perc_empl_ctycc_nord_nosic20_2=1 if emp_cc_nord_nosic20==0 & emp_msa_nord_nosic20==0 & fipsplace_00!=.
egen perc_empl_ctycc_nord_nosic20=max(perc_empl_ctycc_nord_nosic20_2) if fipsplace_00!=., by(FMSA year)
drop perc_empl_ctycc_nord_nosic20_2

*Keep 2 and 3 digit SIC
drop if sic_digit==4

*Compute proportion of employment in central county
levelsof sic if sic_digit==2, local(levels)
foreach l of local levels {
egen emp_msa_sic`l'=total(emp) if sic==`l', by(FMSA year)
gen perc_empl_ctycc_sic`l'=emp/emp_msa_sic`l' if fipsplace_00!=.
replace perc_empl_ctycc_sic`l'=1 if emp==0 & emp_msa_sic`l'==0 & fipsplace_00!=.
}
levelsof sic if sic_digit==3, local(levels)
foreach l of local levels {
egen emp_msa_sic`l'=total(emp) if sic==`l', by(FMSA year)
gen perc_empl_ctycc_sic`l'=emp/emp_msa_sic`l' if fipsplace_00!=.
replace perc_empl_ctycc_sic`l'=1 if emp==0 & emp_msa_sic`l'==0 & fipsplace_00!=.
}
levelsof superind, local(levels)
foreach l of local levels {
egen emp_msa_ind`l'=total(emp) if superind==`l', by(FMSA year)
egen emp_ind_ctycc_ind`l'=total(emp) if superind==`l' & fipsplace_00!=., by(fipsplace_00 year)
gen perc_empl_ctycc_ind`l'=emp_ind_ctycc_ind`l'/emp_msa_ind`l' if fipsplace_00!=.
replace perc_empl_ctycc_ind`l'=1 if emp_ind_ctycc_ind`l'==0 & emp_msa_ind`l'==0 & fipsplace_00!=.
}

collapse (mean) perc_* emp_msa_* ,by(fipsplace_00 year)

save rawdata\data_cbp.dta, replace
